# first thing to do is importing the data
# install.packages("readxl")
readxl::read_excel("data_dec/water_stress.xlsx")
## # A tibble: 1,260 × 23
## Group Week Date Species PlantId Use Treat…¹ Soil_…² Elect…³
## <chr> <chr> <dttm> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 G1 W1 2022-10-05 00:00:00 Solanu… Slc1 cult… c 55.6 1.31
## 2 G1 W1 2022-10-05 00:00:00 Solanu… Slc2 cult… c 52.7 1.28
## 3 G1 W1 2022-10-05 00:00:00 Solanu… Slc3 cult… c 61.3 1.24
## 4 G1 W1 2022-10-05 00:00:00 Solanu… Slc4 cult… c 51 1.36
## 5 G1 W1 2022-10-05 00:00:00 Solanu… Slc5 cult… c 52.1 1.39
## 6 G1 W1 2022-10-05 00:00:00 Solanu… Slc6 cult… c 54.4 1.24
## 7 G1 W1 2022-10-05 00:00:00 Solanu… Slc7 cult… c 51.6 1.51
## 8 G1 W1 2022-10-05 00:00:00 Amaran… Arc1 wild c 54.6 1.68
## 9 G1 W1 2022-10-05 00:00:00 Amaran… Arc2 wild c 50.9 1.93
## 10 G1 W1 2022-10-05 00:00:00 Amaran… Arc3 wild c 67.2 1.89
## # … with 1,250 more rows, 14 more variables: Too_dry <chr>, Plant_height <dbl>,
## # Leaf_number <dbl>, Leaf_length <dbl>, Leaf_width <dbl>, Leaf_area <dbl>,
## # Chlorophyll_content <dbl>, Aerial_fresh_weight <dbl>,
## # Aerial_dry_weight <dbl>, Root_length <dbl>, Roots_fresh_weight <dbl>,
## # Roots_dry_weight <dbl>, Mortality <chr>, Comments <lgl>, and abbreviated
## # variable names ¹​Treatment, ²​Soil_humidity, ³​Electrical_conductivity
d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")
after importing the data and give it the name of d0, we are going to visualize it, by creation different plots.
##Visualization of data with plots. Create many plots.
X= Date Y= Variable Y1= Plant height Y2= Leaf number Y3= Leaf lenght Y4= Leaf width Y5= Leaf area Y7= Chlorophyll
# install.packages("ggplot2", dependencies = TRUE)
library(ggplot2)
# first we are going to visualize all the species in a plot
ggplot(d0, aes(x= Date, y= Plant_height, group= PlantId, color= Treatment)) +
geom_line()+
facet_grid(Species ~.)
## Warning: Removed 3 rows containing missing values (`geom_line()`).
in this code chunk we are try to visualize one species, to know how to plot it first
# For Solanum lycopersicum
s1 <- d0[d0$Species=="Solanum lycopersicum",]
ggplot(s1, aes(x= Date, y= Plant_height, group= PlantId, color= Treatment)) +
geom_line()
then we will create a for loop to visualize all the species and all the variables
v1 <- c("Plant_height", "Leaf_width", "Leaf_length", "Leaf_area", "Leaf_number", "Root_length", "Chlorophyll_content", "Soil_humidity", "Electrical_conductivity")
i <- "Beta vulgaris"
variable <- "Plant_height"
for(i in levels(as.factor(d0$Species))) {
for(variable in v1) {
s1 <- d0[d0$Species==i, c(variable, "Week", "PlantId", "Treatment")]
s1 <- na.exclude(s1)
p <- ggplot(s1, aes(x= Week, y= .data[[variable]], group= PlantId, color= Treatment)) +
geom_line() +
labs(title = i)
print(p)
}
}
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## Chapter 2 - ANOVA
in this part we are trying to do an ANOVA analysis for the data
```r
#Packages
#install.packages(c("ggplot2", "ggpubr", "tidyverse", "broom", "AICcmodavg"), , dependencies = TRUE)
library(ggplot2)
library(ggpubr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ✔ purrr 0.3.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(broom)
#Import data
d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")
d0
## # A tibble: 1,260 × 23
## Group Week Date Species PlantId Use Treat…¹ Soil_…² Elect…³
## <chr> <chr> <dttm> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 G1 W1 2022-10-05 00:00:00 Solanu… Slc1 cult… c 55.6 1.31
## 2 G1 W1 2022-10-05 00:00:00 Solanu… Slc2 cult… c 52.7 1.28
## 3 G1 W1 2022-10-05 00:00:00 Solanu… Slc3 cult… c 61.3 1.24
## 4 G1 W1 2022-10-05 00:00:00 Solanu… Slc4 cult… c 51 1.36
## 5 G1 W1 2022-10-05 00:00:00 Solanu… Slc5 cult… c 52.1 1.39
## 6 G1 W1 2022-10-05 00:00:00 Solanu… Slc6 cult… c 54.4 1.24
## 7 G1 W1 2022-10-05 00:00:00 Solanu… Slc7 cult… c 51.6 1.51
## 8 G1 W1 2022-10-05 00:00:00 Amaran… Arc1 wild c 54.6 1.68
## 9 G1 W1 2022-10-05 00:00:00 Amaran… Arc2 wild c 50.9 1.93
## 10 G1 W1 2022-10-05 00:00:00 Amaran… Arc3 wild c 67.2 1.89
## # … with 1,250 more rows, 14 more variables: Too_dry <chr>, Plant_height <dbl>,
## # Leaf_number <dbl>, Leaf_length <dbl>, Leaf_width <dbl>, Leaf_area <dbl>,
## # Chlorophyll_content <dbl>, Aerial_fresh_weight <dbl>,
## # Aerial_dry_weight <dbl>, Root_length <dbl>, Roots_fresh_weight <dbl>,
## # Roots_dry_weight <dbl>, Mortality <chr>, Comments <lgl>, and abbreviated
## # variable names ¹​Treatment, ²​Soil_humidity, ³​Electrical_conductivity
names(d0)
## [1] "Group" "Week"
## [3] "Date" "Species"
## [5] "PlantId" "Use"
## [7] "Treatment" "Soil_humidity"
## [9] "Electrical_conductivity" "Too_dry"
## [11] "Plant_height" "Leaf_number"
## [13] "Leaf_length" "Leaf_width"
## [15] "Leaf_area" "Chlorophyll_content"
## [17] "Aerial_fresh_weight" "Aerial_dry_weight"
## [19] "Root_length" "Roots_fresh_weight"
## [21] "Roots_dry_weight" "Mortality"
## [23] "Comments"
Most visual difference is in week 6 (w6)
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Plant_height") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Plant_height ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Plant_height
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 454 226.9 7.4552 0.0007558 ***
## Species 8 59474 7434.2 244.2441 < 2.2e-16 ***
## Residuals 198 6027 30.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
##
## Call:
## lm(formula = Plant_height ~ Treatment + Species, data = d1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.8451 -3.1967 -0.2015 2.2747 21.7561
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.9153 1.0076 14.802 < 2e-16 ***
## Treatmenti -0.3329 0.9325 -0.357 0.72152
## Treatments -3.3988 0.9361 -3.631 0.00036 ***
## SpeciesBeta vulgaris 3.9708 1.4991 2.649 0.00873 **
## SpeciesHordeum vulgare 37.2238 1.4745 25.245 < 2e-16 ***
## SpeciesLolium perenne 26.7429 1.4745 18.137 < 2e-16 ***
## SpeciesPortulacea oleracea -6.9810 1.4745 -4.734 4.18e-06 ***
## SpeciesRaphanus sativus 6.1143 1.4745 4.147 5.00e-05 ***
## SpeciesSolanum lycopersicum 44.5286 1.4745 30.199 < 2e-16 ***
## SpeciesSonchus oleraceus 4.5286 1.4745 3.071 0.00243 **
## SpeciesSpinacia oleracea 1.0048 1.4745 0.681 0.49640
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.517 on 198 degrees of freedom
## Multiple R-squared: 0.9086, Adjusted R-squared: 0.904
## F-statistic: 196.9 on 10 and 198 DF, p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y= Plant_height, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_number") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_number ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_number
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 3.27 1.633 1.7577 0.1751
## Species 8 624.58 78.072 84.0148 <2e-16 ***
## Residuals 199 184.92 0.929
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_number, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_number") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_number ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_number
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 334.5 167.25 16.033 3.519e-07 ***
## Species 8 3996.6 499.58 47.891 < 2.2e-16 ***
## Residuals 198 2065.5 10.43
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_number, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_length
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 10.4 5.22 1.3734 0.2556
## Species 8 6314.1 789.27 207.5601 <2e-16 ***
## Residuals 199 756.7 3.80
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_length, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_length
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 79.8 39.9 6.1425 0.002581 **
## Species 8 30954.5 3869.3 595.6664 < 2.2e-16 ***
## Residuals 198 1286.2 6.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_length, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_width") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_width ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_width
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 2.23 1.113 3.5073 0.03184 *
## Species 8 708.84 88.605 279.2442 < 2e-16 ***
## Residuals 199 63.14 0.317
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_width, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_width") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_width ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_width
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 30.0 15.00 11.645 1.654e-05 ***
## Species 8 5014.3 626.78 486.695 < 2.2e-16 ***
## Residuals 198 255.0 1.29
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_width, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_area") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_area ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_area
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 202.4 101.19 6.376 0.002137 **
## Species 8 10806.2 1350.77 85.111 < 2.2e-16 ***
## Residuals 170 2698.0 15.87
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_area, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_area") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_area ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_area
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 7915 3957.3 30.502 3.99e-12 ***
## Species 8 150426 18803.3 144.930 < 2.2e-16 ***
## Residuals 179 23223 129.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_area, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W3" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Chlorophyll_content
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 107.26 53.630 6.5679 0.002311 **
## Species 3 298.82 99.607 12.1985 1.258e-06 ***
## Residuals 78 636.91 8.166
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
###### Week
4
d1 <- d0[d0$Week == "W4" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Chlorophyll_content
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 15.56 7.780 0.8133 0.4459
## Species 5 533.87 106.774 11.1623 8.283e-09 ***
## Residuals 117 1119.17 9.566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
###### Week
5
d1 <- d0[d0$Week == "W5" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Chlorophyll_content
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 23.87 11.94 0.8801 0.4168
## Species 6 2018.13 336.36 24.7988 <2e-16 ***
## Residuals 158 2143.01 13.56
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
###### Week
6
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Chlorophyll_content
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 20.7 10.36 0.5908 0.5551
## Species 6 4832.8 805.47 45.9083 <2e-16 ***
## Residuals 158 2772.1 17.55
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Aerial_fresh_weight") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Aerial_fresh_weight ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Aerial_fresh_weight
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 1673.8 836.91 62.444 < 2.2e-16 ***
## Species 8 5552.5 694.07 51.786 < 2.2e-16 ***
## Residuals 198 2653.7 13.40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
##
## Call:
## lm(formula = Aerial_fresh_weight ~ Treatment + Species, data = d1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8499 -2.7699 -0.1789 2.4298 12.7110
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4232 0.6721 6.581 4.09e-10 ***
## Treatmenti -3.8343 0.6211 -6.173 3.71e-09 ***
## Treatments -6.9069 0.6188 -11.161 < 2e-16 ***
## SpeciesBeta vulgaris 10.7067 0.9824 10.898 < 2e-16 ***
## SpeciesHordeum vulgare 5.6667 0.9824 5.768 3.05e-08 ***
## SpeciesLolium perenne 1.0167 0.9824 1.035 0.301993
## SpeciesPortulacea oleracea 0.6653 0.9824 0.677 0.499097
## SpeciesRaphanus sativus 8.0157 0.9824 8.159 3.84e-14 ***
## SpeciesSolanum lycopersicum 15.2534 0.9824 15.526 < 2e-16 ***
## SpeciesSonchus oleraceus 10.9310 0.9824 11.126 < 2e-16 ***
## SpeciesSpinacia oleracea 3.5238 0.9824 3.587 0.000422 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.661 on 198 degrees of freedom
## Multiple R-squared: 0.7314, Adjusted R-squared: 0.7178
## F-statistic: 53.92 on 10 and 198 DF, p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Aerial_fresh_weight, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Aerial_dry_weight") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Aerial_dry_weight ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Aerial_dry_weight
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 2.499 1.2496 16.463 2.518e-07 ***
## Species 8 53.307 6.6634 87.789 < 2.2e-16 ***
## Residuals 192 14.573 0.0759
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
##
## Call:
## lm(formula = Aerial_dry_weight ~ Treatment + Species, data = d1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8084 -0.1337 -0.0454 0.1422 1.3776
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.203660 0.050654 4.021 8.34e-05 ***
## Treatmenti -0.087959 0.047789 -1.841 0.0672 .
## Treatments -0.291591 0.047335 -6.160 4.18e-09 ***
## SpeciesBeta vulgaris 0.649612 0.077659 8.365 1.22e-14 ***
## SpeciesHordeum vulgare 0.613333 0.073632 8.330 1.51e-14 ***
## SpeciesLolium perenne 0.080476 0.073632 1.093 0.2758
## SpeciesPortulacea oleracea -0.004286 0.073632 -0.058 0.9536
## SpeciesRaphanus sativus 0.801905 0.073632 10.891 < 2e-16 ***
## SpeciesSolanum lycopersicum 1.464762 0.073632 19.893 < 2e-16 ***
## SpeciesSonchus oleraceus 1.228748 0.079261 15.503 < 2e-16 ***
## SpeciesSpinacia oleracea 0.140476 0.073632 1.908 0.0579 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2755 on 192 degrees of freedom
## Multiple R-squared: 0.7929, Adjusted R-squared: 0.7821
## F-statistic: 73.52 on 10 and 192 DF, p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Aerial_dry_weight, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Root_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Root_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Root_length
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 131.2 65.60 1.4067 0.2481
## Species 7 12935.0 1847.86 39.6256 <2e-16 ***
## Residuals 155 7228.1 46.63
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
##
## Call:
## lm(formula = Root_length ~ Treatment + Species, data = d1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.1485 -3.9889 -0.4197 2.6301 27.3015
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.5300 1.7526 2.585 0.0107 *
## Treatmenti -0.5187 1.2905 -0.402 0.6883
## Treatments 0.8026 1.3114 0.612 0.5414
## SpeciesBeta vulgaris 15.6315 2.1971 7.114 3.90e-11 ***
## SpeciesHordeum vulgare 20.1084 2.1971 9.152 3.07e-16 ***
## SpeciesLolium perenne 10.3789 2.1971 4.724 5.15e-06 ***
## SpeciesPortulacea oleracea -0.1675 2.1971 -0.076 0.9393
## SpeciesRaphanus sativus 22.9372 2.1971 10.440 < 2e-16 ***
## SpeciesSolanum lycopersicum 20.3896 2.1971 9.280 < 2e-16 ***
## SpeciesSonchus oleraceus 22.6601 2.1971 10.313 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.829 on 155 degrees of freedom
## Multiple R-squared: 0.6438, Adjusted R-squared: 0.6232
## F-statistic: 31.13 on 9 and 155 DF, p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Root_length, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
in this part we are going to do a PCA analysis for the data
#importing data
#we already imported the data in the previous parts, that's why the functions of importing the data are commented
#readxl::read_excel("data_dec/water_stress.xlsx")
#d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")
Next we need to make a subset for the data with only numerical variables and exclude the other variables, scale the data and after that we will exclude all the missing data
PCA_data <- d0[c(8:9, 11:21)]
PCA_data_scaled <- as.data.frame(scale(PCA_data))
view(PCA_data_scaled)
# exclude missing values NA
PCA_data01 <- na.exclude(PCA_data_scaled)
now we are going to do a PCA of the data
PCA <- prcomp(PCA_data01, scale = FALSE)
print(PCA)
## Standard deviations (1, .., p=13):
## [1] 3.72417071 1.82689971 1.22163374 0.80635464 0.42812078 0.36826198
## [7] 0.29411880 0.25637763 0.22416993 0.14955205 0.09197022 0.07003550
## [13] 0.05500601
##
## Rotation (n x k) = (13 x 13):
## PC1 PC2 PC3 PC4
## Soil_humidity -0.017254267 0.1700524612 -0.06229738 0.37659356
## Electrical_conductivity -0.009083116 -0.0003728194 0.03074676 0.05696844
## Plant_height -0.302500773 0.4011705238 0.07571629 0.17773513
## Leaf_number 0.383546280 0.1584491520 0.89550212 0.04728623
## Leaf_length -0.203988882 0.2201324847 0.01792070 0.04563054
## Leaf_width -0.376722327 0.3584247733 0.13749368 0.18123028
## Leaf_area -0.406776584 0.1193938737 0.11400102 -0.12319435
## Chlorophyll_content 0.053548438 -0.0083927869 0.11955101 -0.52466043
## Aerial_fresh_weight -0.304647459 0.0095169900 0.07750988 -0.60740918
## Aerial_dry_weight -0.299581093 0.1202823281 0.15375918 -0.17423714
## Root_length -0.234939133 -0.0991357149 0.11158576 0.22920582
## Roots_fresh_weight -0.366927920 -0.7173142276 0.28542344 0.20062516
## Roots_dry_weight -0.191697995 -0.2342223964 0.13204067 0.06036467
## PC5 PC6 PC7 PC8
## Soil_humidity -0.13424503 0.80163704 0.054619942 -0.349430979
## Electrical_conductivity -0.04023364 0.15639691 0.058964791 0.006254404
## Plant_height -0.15738002 -0.19141869 0.378327653 0.174602577
## Leaf_number 0.13279639 0.01096453 0.002447671 -0.064674801
## Leaf_length -0.07783896 -0.16330052 0.087459024 -0.240025870
## Leaf_width -0.20366669 -0.24631698 0.028026619 -0.114076142
## Leaf_area 0.30221451 0.09982567 -0.681340502 0.069971600
## Chlorophyll_content -0.80385605 0.09581169 -0.168912505 -0.037246649
## Aerial_fresh_weight 0.33545490 0.11467123 0.344152239 -0.495270203
## Aerial_dry_weight 0.04126735 0.39789489 0.080838705 0.646357818
## Root_length -0.14842516 -0.11140370 -0.415793499 -0.299757085
## Roots_fresh_weight -0.09667141 -0.01320419 0.189440592 -0.012337067
## Roots_dry_weight -0.08538508 0.07495060 0.124352297 0.114905812
## PC9 PC10 PC11 PC12
## Soil_humidity 0.10723286 0.02625363 0.161719915 -0.02591782
## Electrical_conductivity 0.06503693 -0.19795904 -0.901126383 0.32809096
## Plant_height 0.23015184 0.58379038 -0.004741431 0.21725552
## Leaf_number 0.01099318 0.02575191 0.011445878 -0.02181855
## Leaf_length 0.10924809 0.02157394 -0.216087955 -0.43394879
## Leaf_width -0.08954773 -0.68664130 0.160202064 0.03916263
## Leaf_area 0.45640243 0.07055450 -0.029138806 -0.03138557
## Chlorophyll_content 0.13742629 0.04673116 0.020883278 0.02487670
## Aerial_fresh_weight -0.13745256 0.04877217 0.008387221 0.08335376
## Aerial_dry_weight -0.40822616 -0.07418062 0.059597955 0.04358086
## Root_length -0.64986782 0.34689769 -0.090919143 0.14780741
## Roots_fresh_weight 0.26470736 -0.07207278 0.146368621 0.22593645
## Roots_dry_weight -0.07808654 0.07830177 -0.233897863 -0.75552496
## PC13
## Soil_humidity -0.018399852
## Electrical_conductivity -0.055153190
## Plant_height -0.184937225
## Leaf_number 0.017061335
## Leaf_length 0.749755512
## Leaf_width -0.234986243
## Leaf_area -0.081054975
## Chlorophyll_content -0.009653067
## Aerial_fresh_weight -0.120177840
## Aerial_dry_weight 0.282566160
## Root_length 0.005490618
## Roots_fresh_weight 0.189240668
## Roots_dry_weight -0.456051781
now we will plot the PCA results
# Plotting the PCA results
# install.packages("factoextra")
#if(!require(devtools)) install.packages("devtools")
#devtools::install_github("kassambara/factoextra")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggplot2)
fviz_eig(PCA)
# graph for individuals
fviz_pca_ind(PCA,
col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# graph of variable
fviz_pca_var(PCA,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)